Computer Vision Assignment

This assignment is divided into 2 parts:

Part 1: Creating a baseline model with no feature implementation
Part 2: Applying necessary filters to obtain improved accuracy

Part 1: Creating a baseline model with no feature implementation

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize

def prep_data (folder):
    
    height = 128
    width = 128
    class_id = 0
    classnames = []
    features = []
    labels = np.array([])
    for root, dirs, filenames in os.walk(folder):
        for d in sorted(dirs):
            print("Reading data from", d)
            # use the folder name as the class name for this label
            classnames.append(d)
            files = os.listdir(os.path.join(root,d))
            for f in files:
                # Load the image file
                imgFile = os.path.join(root,d, f)
                img = plt.imread(imgFile)
                img = resize(img, (height, width), anti_aliasing=True)
                features.append(img.ravel())
                
                # Add it to the numpy array of labels
                labels = np.append(labels, class_id )
            class_id  += 1
            
    # Convert the list of features into a numpy array
    features = np.array(features)
    
    return features, labels, classnames


# The images are in a folder named 'shapes/training'
training_folder_name = './data/voc/'

# Prepare the image data
features, labels, classnames = prep_data(training_folder_name)
print(len(features), 'features')
print(len(labels), 'labels')
print(len(classnames), 'classes:', classnames)
Reading data from automobile
Reading data from plane
Reading data from train
240 features
240 labels
3 classes: ['automobile', 'plane', 'train']
In [2]:
print('Feature Shape:',features.shape)
print('Labels Shape:',labels.shape)
Feature Shape: (240, 49152)
Labels Shape: (240,)

Due to the lesser number of training images, I have used:

1. Only 10% of the images for validation
2. Cross-validation to technique to overcome overfitting
In [3]:
# split into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(features, labels, test_size=0.10, random_state = 40)

print('Training records:',Y_train.size)
print('Validation records:',Y_test.size)
Training records: 216
Validation records: 24

Different models have been tried to extract the best accuracy. The models used are:

  1. Decision Trees
  2. K Nearest Neightbours
  3. SVC
  4. Gaussian Naive Bayes
  5. Multinomial Naive Bayes
  6. Bernoulli Naive Bayes
  7. Random Forest

The random seed was set to replicate the output.

In [4]:
import random
random.seed(123)

from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score

names = ["Decision Tree","K Nearest Neighbors", "SVC", "Gaussian Naive Bayes", 
         "Multinomial Naive Bayes","Bernoulli Naive Bayes", "Random Forest"]

classifiers = [DecisionTreeClassifier(), 
               KNeighborsClassifier(), SVC(), 
               GaussianNB(), MultinomialNB(), BernoulliNB(), RandomForestClassifier()]

for name, clf in zip(names, classifiers):
    X_train_float = X_train.astype('float64')
    img_pipeline = Pipeline([('norm', MinMaxScaler()), ('classify', clf),])
    clf = img_pipeline.fit(X_train_float, Y_train)
    X_test_float = X_test.astype('float64')
    predictions = clf.predict(X_test)
    predictions = cross_val_predict(clf,X_test,Y_test,cv=3)
    acc = metrics.accuracy_score(Y_test, predictions)
    print(name, '\nAccuracy: {:.2%}'.format(acc))
    
    print('*-----------------------------------------------------------------------------------------------------*')
Decision Tree 
Accuracy: 70.83%
*-----------------------------------------------------------------------------------------------------*
K Nearest Neighbors 
Accuracy: 70.83%
*-----------------------------------------------------------------------------------------------------*
SVC 
Accuracy: 79.17%
*-----------------------------------------------------------------------------------------------------*
Gaussian Naive Bayes 
Accuracy: 75.00%
*-----------------------------------------------------------------------------------------------------*
Multinomial Naive Bayes 
Accuracy: 79.17%
*-----------------------------------------------------------------------------------------------------*
Bernoulli Naive Bayes 
Accuracy: 41.67%
*-----------------------------------------------------------------------------------------------------*
Random Forest 
Accuracy: 83.33%
*-----------------------------------------------------------------------------------------------------*

It can be observed that none of the models show good accuracy.

Let's see how the predictions appear for each model

In [5]:
# Helper function to resize image
def resize_image(src_img, size=(128,128), bg_color="white"): 
    from PIL import Image
    
    # rescale the image so the longest edge is the right size
    src_img.thumbnail(size, Image.ANTIALIAS)
    
    # Create a new image of the right shape
    new_image = Image.new("RGB", size, bg_color)
    
    # Paste the rescaled image onto the new background
    new_image.paste(src_img, (int((size[0] - src_img.size[0]) / 2), int((size[1] - src_img.size[1]) / 2)))
  
    # return the resized image
    return new_image

# Function to predict the class of an image
def predict_image(classifier, image_array):
    import numpy as np
    
    # These are the classes our model can predict
    classnames = ['automobile', 'plane', 'train']
    
    # Predict the class of each input image
    predictions = classifier.predict(image_array)
    
    predicted_classes = []
    for prediction in predictions:
        # And append the corresponding class name to the results
        predicted_classes.append(classnames[int(prediction)])
    # Return the predictions
    return predicted_classes
In [6]:
from PIL import Image
names = ["DT","KNN", "SVC", "GNB", "MNB","BNB", "RF"]

for name, clf in zip(names, classifiers):
    #get the list of test image files
    test_folder = './data/object_detection/test'
    test_image_files = os.listdir(test_folder)

    # Empty array on which to store the images
    image_arrays = []

    size = (128,128)
    background_color = "white"

    fig = plt.figure(figsize=(30, 20))

    # Get the images and show the predicted classes
    for file_idx in range(len(test_image_files)):
        img = Image.open(os.path.join(test_folder, test_image_files[file_idx]))

        # resize the image so it matches the training set - it  must be the same size as the images on which the model was trained
        resized_img = np.array(resize_image(img, size, background_color))

        img_shape = np.array(resized_img).shape

        # Add the image to the array of images
        image_arrays.append(resized_img.ravel())

# Get predictions from the array of image arrays
# Note that the model expects an array of 1 or more images - just like the batches on which it was trained

    predictions = predict_image(clf, np.array(image_arrays))
    # plot each image with its corresponding prediction
    for idx in range(len(predictions)):
        a=fig.add_subplot(1,len(predictions),idx+1)
        img = image_arrays[idx].reshape(img_shape)
        imgplot = plt.imshow(img)
        a.set_title(predictions[idx]+" "+name)

It appears that the predictions are randomly done. Now, this accuracy will be compared after applying the necessary filters.

Part 2: Applying necessary filters to obtain improved accuracy

In [7]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
from skimage.color import rgb2gray

def prep_data (folder):
    
    height = 128
    width = 128
    class_id = 0
    classnames = []
    features = []
    labels = np.array([])
    for root, dirs, filenames in os.walk(folder):
        for d in sorted(dirs):
            print("Reading data from", d)
            # use the folder name as the class name for this label
            classnames.append(d)
            files = os.listdir(os.path.join(root,d))
            for f in files:
                # Load the image file
                imgFile = os.path.join(root,d, f)
                img = plt.imread(imgFile)
                img = rgb2gray(img)
                height = 128
                width = 128
                dim = (height,width)
                resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
                #resized = resize(resized, dim, anti_aliasing=True)
                #sobel edge detection
                sobelx = cv2.Sobel(resized,cv2.CV_64F,1,0,ksize=5)
                sobely = cv2.Sobel(resized,cv2.CV_64F,0,1,ksize=5)
                #print(sobelx.type)
                #magniture
                sobel=np.sqrt(np.multiply(sobelx,sobelx)+np.multiply(sobely,sobely))
                vis = np.concatenate((resized, sobel), axis=0)
                features.append(vis.ravel())
                
                # Add it to the numpy array of labels
                labels = np.append(labels, class_id )
            class_id  += 1
            
    # Convert the list of features into a numpy array
    features = np.array(features)
    
    return features, labels, classnames


# The images are in a folder named 'shapes/training'
training_folder_name = './data/voc/'

# Prepare the image data
features, labels, classnames = prep_data(training_folder_name)
print(len(features), 'features')
print(len(labels), 'labels')
print(len(classnames), 'classes:', classnames)
Reading data from automobile
Reading data from plane
Reading data from train
240 features
240 labels
3 classes: ['automobile', 'plane', 'train']
In [8]:
print('Feature Shape:',features.shape)
print('Labels Shape:',labels.shape)
Feature Shape: (240, 32768)
Labels Shape: (240,)
In [9]:
import random
random.seed(123)

from sklearn import metrics
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import precision_score, recall_score, f1_score

names = ["Decision Tree","K Nearest Neighbors", "SVC", "Gaussian Naive Bayes", 
         "Multinomial Naive Bayes","Bernoulli Naive Bayes", "Random Forest"]

classifiers = [DecisionTreeClassifier(), 
               KNeighborsClassifier(), SVC(), 
               GaussianNB(), MultinomialNB(), BernoulliNB(), RandomForestClassifier()]

for name, clf in zip(names, classifiers):
    X_train_float = X_train.astype('float64')
    img_pipeline = Pipeline([('norm', MinMaxScaler()), ('classify', clf),])
    clf = img_pipeline.fit(X_train_float, Y_train)
    X_test_float = X_test.astype('float64')
    predictions = clf.predict(X_test)
    predictions = cross_val_predict(clf,X_test,Y_test,cv=3)
    acc = metrics.accuracy_score(Y_test, predictions)

    print(name, '\nAccuracy: {:.2%}'.format(acc))
    
    print('*-----------------------------------------------------------------------------------------------------*')
Decision Tree 
Accuracy: 66.67%
*-----------------------------------------------------------------------------------------------------*
K Nearest Neighbors 
Accuracy: 70.83%
*-----------------------------------------------------------------------------------------------------*
SVC 
Accuracy: 79.17%
*-----------------------------------------------------------------------------------------------------*
Gaussian Naive Bayes 
Accuracy: 75.00%
*-----------------------------------------------------------------------------------------------------*
Multinomial Naive Bayes 
Accuracy: 79.17%
*-----------------------------------------------------------------------------------------------------*
Bernoulli Naive Bayes 
Accuracy: 41.67%
*-----------------------------------------------------------------------------------------------------*
Random Forest 
Accuracy: 75.00%
*-----------------------------------------------------------------------------------------------------*

It can be seen that Random Forest and Multinomial Naice Bayes models provide the best accuracy

In [10]:
# Helper function to resize image
def resize_image(src_img, size=(128,128), bg_color="white"): 
    from PIL import Image

    # rescale the image so the longest edge is the right size
    src_img.thumbnail(size, Image.ANTIALIAS)
    
    # Create a new image of the right shape
    new_image = Image.new("RGB", size, bg_color)
    
    # Paste the rescaled image onto the new background
    new_image.paste(src_img, (int((size[0] - src_img.size[0]) / 2), int((size[1] - src_img.size[1]) / 2)))
  
    # return the resized image
    return new_image

# Function to predict the class of an image
def predict_image(classifier, image_array):
    import numpy as np
    
    # These are the classes our model can predict
    classnames = ['automobile', 'plane', 'train']
    
    # Predict the class of each input image
    predictions = classifier.predict(image_array)
    
    predicted_classes = []
    for prediction in predictions:
        # And append the corresponding class name to the results
        predicted_classes.append(classnames[int(prediction)])
    # Return the predictions
    return predicted_classes

print("Functions created - ready to use model for inference.")
Functions created - ready to use model for inference.
In [11]:
from PIL import Image
names = ["DT","KNN", "SVC", "GNB", "MNB","BNB", "RF"]

for name, clf in zip(names, classifiers):
    #get the list of test image files
    test_folder = './data/object_detection/test'
    test_image_files = os.listdir(test_folder)

    # Empty array on which to store the images
    image_arrays = []

    size = (128,128)
    background_color = "white"

    fig = plt.figure(figsize=(30, 20))

    # Get the images and show the predicted classes
    for file_idx in range(len(test_image_files)):
        img = Image.open(os.path.join(test_folder, test_image_files[file_idx]))

        # resize the image so it matches the training set - it  must be the same size as the images on which the model was trained
        resized_img = np.array(resize_image(img, size, background_color))

        img_shape = np.array(resized_img).shape

        # Add the image to the array of images
        image_arrays.append(resized_img.ravel())

# Get predictions from the array of image arrays
# Note that the model expects an array of 1 or more images - just like the batches on which it was trained

    predictions = predict_image(clf, np.array(image_arrays))
    # plot each image with its corresponding prediction
    for idx in range(len(predictions)):
        a=fig.add_subplot(1,len(predictions),idx+1)
        img = image_arrays[idx].reshape(img_shape)
        imgplot = plt.imshow(img)
        a.set_title(predictions[idx]+" "+name)

From the above predictions, it can be seen that the Multinomial Naive Bayes model predicts better than the remaining model. With more number of training data, the model would be able to predict better.